Import data

data_orig <- fread(here::here("data", "candy-data_edit_2021.csv"), stringsAsFactors = FALSE)
data <- data_orig
characteristica <- "chocolate+fruity+caramel+peanutyalmondy+nougat+crispedricewafer+hard+bar+pluribus"

Note that we added producer manually.

Clean and edit data

Remove one dime and one quarter as they do not belong to the original distribution and might change regression results.

data <- data[data$competitorname %ni% c("One dime", "One quarter")]

Because we deleted data, we have to readjust percentile rangs of pricepercent and sugarpercent.

# define function
percentile_ranked <- function(a_vector, value) {
  length(sort(a_vector)[a_vector < value]) / length(a_vector)
}
b <- data$sugarpercent
data$sugarpercent <- sapply(data$sugarpercent, percentile_ranked, a_vector = b)
b <- data$pricepercent
data$pricepercent <- sapply(data$pricepercent, percentile_ranked, a_vector = b)

We checked the sugar content manually and is not plausible (Reese Miniatures, Nestle Crunch, Skittles Wild Berry, Milky Way Simply Caramel). Hence, we build a robust data set without the variable sugarpercent.

robust <- data
robust$sugarpercent <- NULL

Next, we set a numerical subset for further use.

data_num <- subset(data, select = -c(competitorname, producer))
robust_num <- subset(robust, select = -c(competitorname, producer))

Descriptive Analysis

Missing values

data %>% missing_plot()

No missing values.

Frequencies

kable(describe(data, omit = TRUE), caption = "Summary statistics", digits = 3)
Summary statistics
vars n mean sd median trimmed mad min max range skew kurtosis se
chocolate 3 83 0.446 0.500 0.000 0.433 0.000 0.000 1.000 1.000 0.214 -1.978 0.055
fruity 4 83 0.458 0.501 0.000 0.448 0.000 0.000 1.000 1.000 0.166 -1.996 0.055
caramel 5 83 0.169 0.377 0.000 0.090 0.000 0.000 1.000 1.000 1.738 1.033 0.041
peanutyalmondy 6 83 0.169 0.377 0.000 0.090 0.000 0.000 1.000 1.000 1.738 1.033 0.041
nougat 7 83 0.084 0.280 0.000 0.000 0.000 0.000 1.000 1.000 2.938 6.711 0.031
crispedricewafer 8 83 0.084 0.280 0.000 0.000 0.000 0.000 1.000 1.000 2.938 6.711 0.031
hard 9 83 0.181 0.387 0.000 0.104 0.000 0.000 1.000 1.000 1.630 0.664 0.042
bar 10 83 0.253 0.437 0.000 0.194 0.000 0.000 1.000 1.000 1.116 -0.764 0.048
pluribus 11 83 0.530 0.502 1.000 0.537 0.000 0.000 1.000 1.000 -0.119 -2.010 0.055
sugarpercent 12 83 0.472 0.286 0.446 0.469 0.375 0.000 0.988 0.988 0.097 -1.183 0.031
pricepercent 13 83 0.464 0.289 0.458 0.459 0.322 0.000 0.976 0.976 0.127 -1.201 0.032
winpercent 14 83 50.585 14.749 48.983 50.102 15.202 22.445 84.180 61.735 0.295 -0.689 1.619

Correlations

corrplot(as.matrix(cor(na.omit(data_num))))

# corrplot for presentation
data_num_deutsch <- subset(data_num, select = -c(sugarpercent, pricepercent))
colnames(data_num_deutsch) <- c(
  "Schokolade",
  "Frucht",
  "Karamell",
  "Nuss",
  "Nougat",
  "Keksartig",
  "Hart",
  "Riegel",
  "Mehrteilig",
  "Beliebtheit"
)
corrplot(as.matrix(cor(na.omit(data_num_deutsch))), tl.col = "black", tl.cex = 1.2, tl.srt = 50)

High negative correlation between chocolate and fruity. High positive correlation between chocolate and winpercent. Negative correlation between pluribus and bar and bar and fruity. Also positive correlation of pricepercent and winpercent

Response functions

plot(data$winpercent, data$sugarpercent)

plot(data$winpercent, data$pricepercent)

plot(data$winpercent, data$fruity)

plot(data$winpercent, data$crispedricewafer)

plot(data$winpercent, data$chocolate)

plot(data$winpercent, data$nougat)

plot(data$winpercent, data$caramel)

plot(data$winpercent, data$pluribus)

plot(data$winpercent, data$bar)

plot(data$winpercent, data$hard)

Producer

Producer frequencies

producer_freq <- table(data$producer)
producer_freq
## 
## American Licorice Company                   Ferrara                    Haribo 
##                         1                        20                         4 
##                   Hershey                    Impact                 Just Born 
##                        17                         1                         1 
##                      Mars                  Mondelez                  Perfetti 
##                        14                         4                         1 
##                   Several                  Spangler                    Storck 
##                         2                         1                         1 
##                SweetWorks                   Tootsie                     Topps 
##                         1                        11                         1 
##                  Washburn                    Welchs                      Zeta 
##                         1                         1                         1
# save for use in presentation
write.xlsx(producer_freq, here::here("output", "2021", "producer_freq.xlsx"))

Regressions

Usual LM for characteristica

We start with an easy linear model with only characteristica.

mod_lm <- lm(winpercent ~ ., data_num)
summary(mod_lm)
## 
## Call:
## lm(formula = winpercent ~ ., data = data_num)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.4077  -6.0740   0.8284   6.7131  23.8284 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       32.3272     5.0052   6.459 1.14e-08 ***
## chocolate         21.2291     4.1161   5.158 2.17e-06 ***
## fruity            11.0000     4.0658   2.705  0.00853 ** 
## caramel            2.7913     3.6848   0.758  0.45125    
## peanutyalmondy    10.6861     3.6418   2.934  0.00450 ** 
## nougat             0.3781     5.7094   0.066  0.94739    
## crispedricewafer   8.8919     5.2566   1.692  0.09511 .  
## hard              -5.8652     3.4633  -1.694  0.09474 .  
## bar                1.7567     5.1490   0.341  0.73398    
## pluribus           0.2133     3.1882   0.067  0.94685    
## sugarpercent       9.8302     4.5527   2.159  0.03421 *  
## pricepercent      -7.3997     5.5215  -1.340  0.18446    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.68 on 71 degrees of freedom
## Multiple R-squared:  0.5461, Adjusted R-squared:  0.4758 
## F-statistic: 7.766 on 11 and 71 DF,  p-value: 1.222e-08
vif(mod_lm)
##        chocolate           fruity          caramel   peanutyalmondy 
##         3.046647         2.986683         1.385823         1.353677 
##           nougat crispedricewafer             hard              bar 
##         1.832239         1.553142         1.292665         3.647083 
##         pluribus     sugarpercent     pricepercent 
##         1.842924         1.223164         1.825270
resettest(mod_lm)
## 
##  RESET test
## 
## data:  mod_lm
## RESET = 3.1783, df1 = 2, df2 = 69, p-value = 0.04782
raintest(mod_lm)
## 
##  Rainbow test
## 
## data:  mod_lm
## Rain = 2.0262, df1 = 42, df2 = 29, p-value = 0.02437
bptest(mod_lm)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_lm
## BP = 15.488, df = 11, p-value = 0.1612
durbinWatsonTest(mod_lm)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1206196      1.729298   0.124
##  Alternative hypothesis: rho != 0
plot(mod_lm)

Adjusted R-squared of 0.4757844 is reasonable. Additionally, significant values with high effect sizes for chocolate (+), fruity (+), peanutyalmondy (+), crispedricewafer (+), hard (-) and sugarpercent (+). Negative coefficient for pricepercent seems plausible. Linearity assumptions more or less given. VIF okay. No heteroscedasticity. But raintest for linearity fails. Some variables with high leverage (Hershey Whoppers and Snickers Crisp), but could just be due to low sample size. Resettest gives a hint that model might be wrongly specified, hence add more controls.

Check for quadratic term of pricepercent and sugarpercent

mod_lm_sq <- lm(winpercent ~ . + I(pricepercent^2) + I(sugarpercent^2), data_num)
summary(mod_lm_sq)
## 
## Call:
## lm(formula = winpercent ~ . + I(pricepercent^2) + I(sugarpercent^2), 
##     data = data_num)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.3474  -5.4761   0.0792   7.6808  22.4534 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        32.8062     5.6632   5.793 1.88e-07 ***
## chocolate          21.1224     4.0689   5.191 2.01e-06 ***
## fruity             10.8851     4.0321   2.700  0.00872 ** 
## caramel             2.3182     3.6504   0.635  0.52750    
## peanutyalmondy      9.9475     3.6652   2.714  0.00839 ** 
## nougat             -0.2775     5.7985  -0.048  0.96197    
## crispedricewafer    8.7767     5.1994   1.688  0.09592 .  
## hard               -4.8142     3.4880  -1.380  0.17198    
## bar                 4.1663     5.2421   0.795  0.42947    
## pluribus            0.3409     3.1521   0.108  0.91418    
## sugarpercent      -19.7129    18.4106  -1.071  0.28802    
## pricepercent       17.0319    18.3694   0.927  0.35706    
## I(pricepercent^2) -23.0490    17.5241  -1.315  0.19277    
## I(sugarpercent^2)  27.9375    17.6441   1.583  0.11791    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.55 on 69 degrees of freedom
## Multiple R-squared:  0.5691, Adjusted R-squared:  0.4879 
## F-statistic:  7.01 on 13 and 69 DF,  p-value: 2.016e-08
vif(mod_lm_sq)
##         chocolate            fruity           caramel    peanutyalmondy 
##          3.047612          3.006811          1.392179          1.403506 
##            nougat  crispedricewafer              hard               bar 
##          1.934621          1.555489          1.342147          3.869638 
##          pluribus      sugarpercent      pricepercent I(pricepercent^2) 
##          1.843996         20.475311         20.680398         18.820574 
## I(sugarpercent^2) 
##         19.071573
resettest(mod_lm_sq)
## 
##  RESET test
## 
## data:  mod_lm_sq
## RESET = 2.9699, df1 = 2, df2 = 67, p-value = 0.0581
raintest(mod_lm_sq)
## 
##  Rainbow test
## 
## data:  mod_lm_sq
## Rain = 2.3585, df1 = 42, df2 = 27, p-value = 0.01038
bptest(mod_lm_sq)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_lm_sq
## BP = 15.73, df = 13, p-value = 0.264
durbinWatsonTest(mod_lm_sq)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1627482       1.64358    0.05
##  Alternative hypothesis: rho != 0
plot(mod_lm_sq)

The signs of the quadratic terms might give a hint that the marginal effects are decreasing. But as their effect sizes are really small, not significant and the plots against the dependent variable (see response functions) do not give any hints, we should not overinterpret the results. Model specification test did not get better.

Check for interaction terms

Do normal lm with interaction. Note that we do no interaction with sugarpercent and pricepercent as this would be too hard to interpret and blow the model up. Also note that we do only consider interaction of order 2 as higher orders are not available in the data set.

mod_lm_ia <- lm(paste0("winpercent~(", characteristica, ")^2+."), data_num)
summary(mod_lm_ia)
## 
## Call:
## lm(formula = paste0("winpercent~(", characteristica, ")^2+."), 
##     data = data_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.323  -4.555   0.000   5.123  22.184 
## 
## Coefficients: (18 not defined because of singularities)
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      34.8396    14.9411   2.332   0.0235 *
## chocolate                         9.5774    18.1869   0.527   0.6007  
## fruity                            7.5397    15.7288   0.479   0.6337  
## caramel                          -3.9759    11.6501  -0.341   0.7342  
## peanutyalmondy                    3.6618    17.3984   0.210   0.8341  
## nougat                            8.4206    28.4788   0.296   0.7686  
## crispedricewafer                 -2.1027    10.9835  -0.191   0.8489  
## hard                            -11.0643    13.6241  -0.812   0.4204  
## bar                              11.2866    10.9860   1.027   0.3089  
## pluribus                         -0.7438    14.1169  -0.053   0.9582  
## sugarpercent                     10.0728     4.5903   2.194   0.0326 *
## pricepercent                     -8.4200     5.4547  -1.544   0.1286  
## chocolate:fruity                 -0.3433    21.0781  -0.016   0.9871  
## chocolate:caramel                14.4037    13.8548   1.040   0.3032  
## chocolate:peanutyalmondy         30.1905    13.4085   2.252   0.0285 *
## chocolate:nougat                -12.5211    29.6417  -0.422   0.6744  
## chocolate:crispedricewafer            NA         NA      NA       NA  
## chocolate:hard                        NA         NA      NA       NA  
## chocolate:bar                         NA         NA      NA       NA  
## chocolate:pluribus                6.3170    17.7014   0.357   0.7226  
## fruity:caramel                   -7.1948    15.6814  -0.459   0.6482  
## fruity:peanutyalmondy                 NA         NA      NA       NA  
## fruity:nougat                         NA         NA      NA       NA  
## fruity:crispedricewafer               NA         NA      NA       NA  
## fruity:hard                       5.1245    12.1454   0.422   0.6748  
## fruity:bar                            NA         NA      NA       NA  
## fruity:pluribus                   3.7290    14.8375   0.251   0.8025  
## caramel:peanutyalmondy          -21.6567    14.1757  -1.528   0.1325  
## caramel:nougat                    7.1037    14.8120   0.480   0.6335  
## caramel:crispedricewafer         -2.0528    14.1557  -0.145   0.8853  
## caramel:hard                     22.6575    19.4727   1.164   0.2498  
## caramel:bar                      -4.2842    13.6508  -0.314   0.7549  
## caramel:pluribus                      NA         NA      NA       NA  
## peanutyalmondy:nougat            18.6273    15.6040   1.194   0.2379  
## peanutyalmondy:crispedricewafer       NA         NA      NA       NA  
## peanutyalmondy:hard                   NA         NA      NA       NA  
## peanutyalmondy:bar              -28.6385    13.5621  -2.112   0.0394 *
## peanutyalmondy:pluribus         -12.9916    13.2823  -0.978   0.3325  
## nougat:crispedricewafer               NA         NA      NA       NA  
## nougat:hard                           NA         NA      NA       NA  
## nougat:bar                            NA         NA      NA       NA  
## nougat:pluribus                       NA         NA      NA       NA  
## crispedricewafer:hard                 NA         NA      NA       NA  
## crispedricewafer:bar             17.7104    13.3324   1.328   0.1897  
## crispedricewafer:pluribus             NA         NA      NA       NA  
## hard:bar                              NA         NA      NA       NA  
## hard:pluribus                    -0.1021     7.5475  -0.014   0.9893  
## bar:pluribus                          NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.761 on 53 degrees of freedom
## Multiple R-squared:  0.7169, Adjusted R-squared:  0.562 
## F-statistic: 4.628 on 29 and 53 DF,  p-value: 6.99e-07
# note: cannot calculate vif as "there are aliased coefficients in the model", hence remove linear dependent
ld_vars <- attributes(alias(mod_lm_ia)$Complete)$dimnames[[1]]
mod_lm_ia2 <- lm(paste(paste0("winpercent~(", characteristica, ")^2+."), paste(ld_vars, collapse = "-"), sep = "-"), data_num)
summary(mod_lm_ia2)
## 
## Call:
## lm(formula = paste(paste0("winpercent~(", characteristica, ")^2+."), 
##     paste(ld_vars, collapse = "-"), sep = "-"), data = data_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.323  -4.555   0.000   5.123  22.184 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               34.8396    14.9411   2.332   0.0235 *
## chocolate                  9.5774    18.1869   0.527   0.6007  
## fruity                     7.5397    15.7288   0.479   0.6337  
## caramel                   -3.9759    11.6501  -0.341   0.7342  
## peanutyalmondy             3.6618    17.3984   0.210   0.8341  
## nougat                     8.4206    28.4788   0.296   0.7686  
## crispedricewafer          -2.1027    10.9835  -0.191   0.8489  
## hard                     -11.0643    13.6241  -0.812   0.4204  
## bar                       11.2866    10.9860   1.027   0.3089  
## pluribus                  -0.7438    14.1169  -0.053   0.9582  
## sugarpercent              10.0728     4.5903   2.194   0.0326 *
## pricepercent              -8.4200     5.4547  -1.544   0.1286  
## chocolate:fruity          -0.3433    21.0781  -0.016   0.9871  
## chocolate:caramel         14.4037    13.8548   1.040   0.3032  
## chocolate:peanutyalmondy  30.1905    13.4085   2.252   0.0285 *
## chocolate:nougat         -12.5211    29.6417  -0.422   0.6744  
## chocolate:pluribus         6.3170    17.7014   0.357   0.7226  
## fruity:caramel            -7.1948    15.6814  -0.459   0.6482  
## fruity:hard                5.1245    12.1454   0.422   0.6748  
## fruity:pluribus            3.7290    14.8375   0.251   0.8025  
## caramel:peanutyalmondy   -21.6567    14.1757  -1.528   0.1325  
## caramel:nougat             7.1037    14.8120   0.480   0.6335  
## caramel:crispedricewafer  -2.0528    14.1557  -0.145   0.8853  
## caramel:hard              22.6575    19.4727   1.164   0.2498  
## caramel:bar               -4.2842    13.6508  -0.314   0.7549  
## peanutyalmondy:nougat     18.6273    15.6040   1.194   0.2379  
## peanutyalmondy:bar       -28.6385    13.5621  -2.112   0.0394 *
## peanutyalmondy:pluribus  -12.9916    13.2823  -0.978   0.3325  
## crispedricewafer:bar      17.7104    13.3324   1.328   0.1897  
## hard:pluribus             -0.1021     7.5475  -0.014   0.9893  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.761 on 53 degrees of freedom
## Multiple R-squared:  0.7169, Adjusted R-squared:  0.562 
## F-statistic: 4.628 on 29 and 53 DF,  p-value: 6.99e-07
resettest(mod_lm_ia2)
## 
##  RESET test
## 
## data:  mod_lm_ia2
## RESET = 1.0939, df1 = 2, df2 = 51, p-value = 0.3426
raintest(mod_lm_ia2)
## 
##  Rainbow test
## 
## data:  mod_lm_ia2
## Rain = 0.52355, df1 = 42, df2 = 11, p-value = 0.9344
bptest(mod_lm_ia2)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_lm_ia2
## BP = 27.95, df = 29, p-value = 0.5206
durbinWatsonTest(mod_lm_ia2)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.04746414      1.890732   0.466
##  Alternative hypothesis: rho != 0
vif(mod_lm_ia2)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##                chocolate                   fruity                  caramel 
##                71.191685                53.498399                16.580145 
##           peanutyalmondy                   nougat         crispedricewafer 
##                36.978610                54.564221                 8.116112 
##                     hard                      bar                 pluribus 
##                23.942560                19.872215                43.246644 
##             sugarpercent             pricepercent         chocolate:fruity 
##                 1.488276                 2.132169                 4.607145 
##        chocolate:caramel chocolate:peanutyalmondy         chocolate:nougat 
##                17.720456                19.371192                51.333575 
##       chocolate:pluribus           fruity:caramel              fruity:hard 
##                33.760596                 2.549970                16.975454 
##          fruity:pluribus   caramel:peanutyalmondy           caramel:nougat 
##                41.259431                 6.098930                 8.767363 
## caramel:crispedricewafer             caramel:hard              caramel:bar 
##                 6.081766                 3.932069                14.138982 
##    peanutyalmondy:nougat       peanutyalmondy:bar  peanutyalmondy:pluribus 
##                 7.389826                12.374338                 7.049923 
##     crispedricewafer:bar            hard:pluribus 
##                10.385179                 4.322207

Could be that chocolate and peanutyalmondy as well as as peanutyalmondy and bar is very good. But better do a double lasso because of colinearity problem which still exists as can be seen in the VIFs.

rs <- rlasso(paste0("winpercent~(", characteristica, ")^2+."), data_num)
selected <- which(coef(rs)[-c(1:1)] != 0) # = Select relevant variables = #
formula <- paste(c("winpercent ~", names(selected)), collapse = "+")
mod_lm_ia_lasso <- lm(formula, data = data_num)
summary(mod_lm_ia_lasso)
## 
## Call:
## lm(formula = formula, data = data_num)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5600  -7.7555  -0.0628   8.3021  24.7670 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                42.271      1.622  26.059  < 2e-16 ***
## chocolate                  15.011      2.734   5.491 4.58e-07 ***
## chocolate:peanutyalmondy   11.222      3.864   2.904  0.00476 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11 on 80 degrees of freedom
## Multiple R-squared:  0.4571, Adjusted R-squared:  0.4436 
## F-statistic: 33.68 on 2 and 80 DF,  p-value: 2.441e-11
resettest(mod_lm_ia_lasso)
## 
##  RESET test
## 
## data:  mod_lm_ia_lasso
## RESET = 0, df1 = 2, df2 = 78, p-value = 1
raintest(mod_lm_ia_lasso)
## 
##  Rainbow test
## 
## data:  mod_lm_ia_lasso
## Rain = 1.1539, df1 = 42, df2 = 38, p-value = 0.3288
bptest(mod_lm_ia_lasso)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_lm_ia_lasso
## BP = 1.3067, df = 2, p-value = 0.5203
durbinWatsonTest(mod_lm_ia_lasso)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.07416245      1.835764   0.398
##  Alternative hypothesis: rho != 0
vif(mod_lm_ia_lasso)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##                chocolate chocolate:peanutyalmondy 
##                 1.266024                 1.266024
plot(mod_lm_ia_lasso)

Chocolate and peanutyalmondy is definitely a good idea! Besides, Lasso gives a well specified model.

Robustness with random intercepts for producer

Note that cluster sizes of one are okay, c.f. https://stats.stackexchange.com/questions/388937/minimum-sample-size-per-cluster-in-a-random-effect-model We do not consider random slope models as the sample size is too few for this.

glm_data <- subset(data, select = -c(competitorname))
# get covariates
data_tmp <- subset(glm_data, select = -c(producer, winpercent))
all_covariates <- colnames(data_tmp)
# build formula
formula <- paste(c("winpercent~(1|producer)", all_covariates), collapse = "+")
# do glm
mod_glm <- lmer(formula, data = glm_data, control = lmerControl(optimizer = "bobyqa"))
summary(mod_glm)
## Linear mixed model fit by REML ['lmerMod']
## Formula: winpercent ~ (1 | producer) + chocolate + fruity + caramel +  
##     peanutyalmondy + nougat + crispedricewafer + hard + bar +  
##     pluribus + sugarpercent + pricepercent
##    Data: glm_data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 554.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.21056 -0.49201  0.00977  0.53617  1.86012 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  producer (Intercept) 31.21    5.587   
##  Residual             85.06    9.223   
## Number of obs: 83, groups:  producer, 18
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      35.08133    4.82011   7.278
## chocolate        19.92717    3.80867   5.232
## fruity            8.88721    3.75832   2.365
## caramel           1.99811    3.33333   0.599
## peanutyalmondy    7.55000    3.29266   2.293
## nougat           -0.81961    5.01376  -0.163
## crispedricewafer  7.09682    4.60949   1.540
## hard             -3.90837    3.33892  -1.171
## bar              -0.02138    4.60408  -0.005
## pluribus         -0.86395    2.98638  -0.289
## sugarpercent      8.46224    4.22019   2.005
## pricepercent     -7.54571    5.14366  -1.467
## 
## Correlation of Fixed Effects:
##             (Intr) choclt fruity caraml pntylm nougat crspdr hard   bar   
## chocolate   -0.462                                                        
## fruity      -0.671  0.579                                                 
## caramel     -0.229  0.136  0.272                                          
## peantylmndy -0.196 -0.032  0.227  0.121                                   
## nougat      -0.032  0.039  0.040 -0.189 -0.089                            
## crispdrcwfr  0.019 -0.092  0.017 -0.143  0.181  0.384                     
## hard        -0.154 -0.023 -0.171  0.022  0.085  0.002  0.037              
## bar         -0.240 -0.135  0.126  0.090  0.130 -0.483 -0.312  0.109       
## pluribus    -0.500  0.074  0.173  0.201  0.186 -0.033  0.004  0.194  0.522
## sugarpercnt -0.197  0.046 -0.007 -0.122 -0.027 -0.071 -0.019 -0.185  0.075
## pricepercnt -0.126 -0.153 -0.025 -0.116 -0.166  0.153 -0.060  0.063 -0.345
##             plurbs sgrprc
## chocolate                
## fruity                   
## caramel                  
## peantylmndy              
## nougat                   
## crispdrcwfr              
## hard                     
## bar                      
## pluribus                 
## sugarpercnt -0.084       
## pricepercnt -0.162 -0.347
r.squaredGLMM(mod_glm)
##           R2m       R2c
## [1,] 0.423914 0.5785676

Check validity of random intercepts

# ICC
icc_output <- performance::icc(mod_glm)
as.data.frame(icc_output)
ICC_adjustedICC_unadjustedoptional
0.2680.155FALSE
# cAIC
cAIC(mod_lm)
##                                                    
##                Conditional log-likelihood:  -307.86
##                        Degrees of freedom:    13.00
##  Conditional Akaike information criterion:   641.71
cAIC(mod_glm)
##                                                    
##                Conditional log-likelihood:  -292.79
##                        Degrees of freedom:    20.31
##  Conditional Akaike information criterion:   626.19
# inspect random intercepts
ranef(mod_glm)
## $producer
##                           (Intercept)
## American Licorice Company  -2.6120017
## Ferrara                    -1.5794195
## Haribo                      2.9841001
## Hershey                     4.1525747
## Impact                     -0.1986142
## Just Born                  -0.4482936
## Mars                        8.1069462
## Mondelez                    3.5070438
## Perfetti                    1.2200314
## Several                    -2.0302559
## Spangler                   -1.7543782
## Storck                      2.5011972
## SweetWorks                 -5.5055836
## Tootsie                    -6.8833535
## Topps                      -0.9701245
## Washburn                   -1.6826901
## Welchs                      0.2944472
## Zeta                        0.8983742
## 
## with conditional variances for "producer"

ICC above 0.10, random intercepts differ, cAIC for random intercept model is smaller, hence random intercept model valid and has better pseudo R-squared. Results robust with random intercepts but effect sizes got smaller. Also nougat, bar and pluribus went negative, so do not produce it like this!

Robustness without sugarpercent

mod_wo_sugar <- lm(winpercent ~ ., robust_num)
summary(mod_wo_sugar)
## 
## Call:
## lm(formula = winpercent ~ ., data = robust_num)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.3393  -5.7765   0.2818   6.7984  21.3206 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       34.2670     5.0476   6.789  2.7e-09 ***
## chocolate         21.2146     4.2195   5.028  3.5e-06 ***
## fruity            11.3362     4.1649   2.722  0.00814 ** 
## caramel            4.1148     3.7248   1.105  0.27296    
## peanutyalmondy    10.9349     3.7314   2.930  0.00453 ** 
## nougat             1.7557     5.8161   0.302  0.76362    
## crispedricewafer   9.1631     5.3871   1.701  0.09327 .  
## hard              -4.6263     3.5013  -1.321  0.19058    
## bar                0.9368     5.2639   0.178  0.85925    
## pluribus           0.8337     3.2550   0.256  0.79859    
## pricepercent      -3.5146     5.3512  -0.657  0.51341    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.95 on 72 degrees of freedom
## Multiple R-squared:  0.5163, Adjusted R-squared:  0.4491 
## F-statistic: 7.685 on 10 and 72 DF,  p-value: 3.178e-08
resettest(mod_wo_sugar)
## 
##  RESET test
## 
## data:  mod_wo_sugar
## RESET = 2.894, df1 = 2, df2 = 70, p-value = 0.06201
raintest(mod_wo_sugar)
## 
##  Rainbow test
## 
## data:  mod_wo_sugar
## Rain = 1.4175, df1 = 42, df2 = 30, p-value = 0.1602
bptest(mod_wo_sugar)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_wo_sugar
## BP = 10.235, df = 10, p-value = 0.4201
durbinWatsonTest(mod_wo_sugar)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.08178324      1.816866   0.312
##  Alternative hypothesis: rho != 0
vif(mod_wo_sugar)
##        chocolate           fruity          caramel   peanutyalmondy 
##         3.046639         2.982304         1.347473         1.352322 
##           nougat crispedricewafer             hard              bar 
##         1.809357         1.552256         1.257185         3.627245 
##         pluribus     pricepercent 
##         1.827957         1.631432
plot(mod_wo_sugar)

The previous results for the linear model seem to be still robust. Only hard is not significant any more. But the model specification tests are better! Next look at the quadratic terms.

mod_lm_sq_wo_sugar <- lm(winpercent ~ . + I(pricepercent^2), robust_num)
summary(mod_lm_sq_wo_sugar)
## 
## Call:
## lm(formula = winpercent ~ . + I(pricepercent^2), data = robust_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.235  -6.556   1.140   7.343  21.384 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        30.5545     5.5187   5.537 4.89e-07 ***
## chocolate          21.1084     4.1766   5.054 3.24e-06 ***
## fruity             10.7972     4.1361   2.611   0.0110 *  
## caramel             3.6349     3.6989   0.983   0.3291    
## peanutyalmondy      9.8040     3.7615   2.606   0.0111 *  
## nougat             -0.4861     5.9279  -0.082   0.9349    
## crispedricewafer    8.8070     5.3363   1.650   0.1033    
## hard               -3.8891     3.4964  -1.112   0.2698    
## bar                 2.2732     5.2777   0.431   0.6680    
## pluribus            0.8172     3.2215   0.254   0.8005    
## pricepercent       22.5695    17.3084   1.304   0.1965    
## I(pricepercent^2) -27.1161    17.1302  -1.583   0.1179    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.83 on 71 degrees of freedom
## Multiple R-squared:  0.5328, Adjusted R-squared:  0.4604 
## F-statistic: 7.361 on 11 and 71 DF,  p-value: 3.071e-08
resettest(mod_lm_sq_wo_sugar)
## 
##  RESET test
## 
## data:  mod_lm_sq_wo_sugar
## RESET = 2.2925, df1 = 2, df2 = 69, p-value = 0.1087
raintest(mod_lm_sq_wo_sugar)
## 
##  Rainbow test
## 
## data:  mod_lm_sq_wo_sugar
## Rain = 1.8759, df1 = 42, df2 = 29, p-value = 0.03913
bptest(mod_lm_sq_wo_sugar)
## 
##  studentized Breusch-Pagan test
## 
## data:  mod_lm_sq_wo_sugar
## BP = 9.7106, df = 11, p-value = 0.5566
durbinWatsonTest(mod_lm_sq_wo_sugar)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1325029      1.718943   0.144
##  Alternative hypothesis: rho != 0
vif(mod_lm_sq_wo_sugar)
##         chocolate            fruity           caramel    peanutyalmondy 
##          3.047426          3.002648          1.356588          1.402931 
##            nougat  crispedricewafer              hard               bar 
##          1.918889          1.555018          1.279895          3.722510 
##          pluribus      pricepercent I(pricepercent^2) 
##          1.827976         17.424980         17.067684
plot(mod_lm_sq_wo_sugar)

Concerning adjusted R^2 and model specification test, this is the best model so far. Let us take a look at the random intercept model.

glm_data <- subset(robust, select = -c(competitorname))
# get covariates
data_tmp <- subset(glm_data, select = -c(producer, winpercent))
all_covariates <- colnames(data_tmp)
# build formula
formula <- paste(c("winpercent~(1|producer)", all_covariates), collapse = "+")
# do glm
mod_glm_wo_sugar <- lmer(formula, data = glm_data, control = lmerControl(optimizer = "bobyqa"))
summary(mod_glm_wo_sugar)
## Linear mixed model fit by REML ['lmerMod']
## Formula: winpercent ~ (1 | producer) + chocolate + fruity + caramel +  
##     peanutyalmondy + nougat + crispedricewafer + hard + bar +  
##     pluribus + pricepercent
##    Data: glm_data
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 563.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.33289 -0.48834  0.04583  0.58399  1.97023 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  producer (Intercept) 31.64    5.625   
##  Residual             88.88    9.427   
## Number of obs: 83, groups:  producer, 18
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)      36.98003    4.82074   7.671
## chocolate        19.57678    3.88561   5.038
## fruity            8.95990    3.83723   2.335
## caramel           2.82292    3.38032   0.835
## peanutyalmondy    7.74821    3.36337   2.304
## nougat           -0.08993    5.11131  -0.018
## crispedricewafer  7.28770    4.71046   1.547
## hard             -2.69400    3.34803  -0.805
## bar              -0.70627    4.69057  -0.151
## pluribus         -0.35710    3.03803  -0.118
## pricepercent     -3.94503    4.92509  -0.801
## 
## Correlation of Fixed Effects:
##             (Intr) choclt fruity caraml pntylm nougat crspdr hard   bar   
## chocolate   -0.464                                                        
## fruity      -0.687  0.580                                                 
## caramel     -0.261  0.143  0.273                                          
## peantylmndy -0.205 -0.032  0.227  0.119                                   
## nougat      -0.047  0.042  0.039 -0.200 -0.092                            
## crispdrcwfr  0.015 -0.091  0.017 -0.147  0.181  0.384                     
## hard        -0.198 -0.014 -0.174 -0.001  0.081 -0.011  0.034              
## bar         -0.231 -0.139  0.127  0.100  0.131 -0.481 -0.312  0.126       
## pluribus    -0.529  0.079  0.173  0.193  0.184 -0.040  0.002  0.183  0.531
## pricepercnt -0.212 -0.146 -0.029 -0.170 -0.186  0.137 -0.071 -0.002 -0.341
##             plurbs
## chocolate         
## fruity            
## caramel           
## peantylmndy       
## nougat            
## crispdrcwfr       
## hard              
## bar               
## pluribus          
## pricepercnt -0.204
r.squaredGLMM(mod_glm_wo_sugar)
##            R2m       R2c
## [1,] 0.3937094 0.5528884
# ICC
icc_output <- performance::icc(mod_glm_wo_sugar)
as.data.frame(icc_output)
ICC_adjustedICC_unadjustedoptional
0.2630.159FALSE
# cAIC
cAIC(mod_glm_wo_sugar)
##                                                    
##                Conditional log-likelihood:  -295.13
##                        Degrees of freedom:    19.23
##  Conditional Akaike information criterion:   628.72
cAIC(mod_glm_wo_sugar)
##                                                    
##                Conditional log-likelihood:  -295.13
##                        Degrees of freedom:    19.23
##  Conditional Akaike information criterion:   628.72
# inspect random intercepts
ranef(mod_glm_wo_sugar)
## $producer
##                            (Intercept)
## American Licorice Company -2.049570340
## Ferrara                   -1.881137952
## Haribo                     2.749819075
## Hershey                    3.820180384
## Impact                    -0.999315229
## Just Born                  0.542074150
## Mars                       9.174189093
## Mondelez                   2.171138661
## Perfetti                   2.204811341
## Several                   -2.187326495
## Spangler                  -0.968874497
## Storck                     1.521057479
## SweetWorks                -5.564006997
## Tootsie                   -6.825687836
## Topps                     -1.090281154
## Washburn                  -1.046922866
## Welchs                    -0.004999566
## Zeta                       0.434852750
## 
## with conditional variances for "producer"

It is also robust. Last, check interaction terms. Do normal lm with interaction without sugarpercent:

mod_lm_ia_wo_sugar <- lm(paste0("winpercent~(", characteristica, ")^2+."), robust_num)
summary(mod_lm_ia_wo_sugar)
## 
## Call:
## lm(formula = paste0("winpercent~(", characteristica, ")^2+."), 
##     data = robust_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.287  -5.030   0.000   5.086  19.716 
## 
## Coefficients: (18 not defined because of singularities)
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      34.55554   15.45935   2.235   0.0296 *
## chocolate                        10.89272   18.80816   0.579   0.5649  
## fruity                           10.35803   16.22061   0.639   0.5258  
## caramel                          -0.85160   11.96425  -0.071   0.9435  
## peanutyalmondy                    1.45926   17.97258   0.081   0.9356  
## nougat                           17.04178   29.18591   0.584   0.5617  
## crispedricewafer                  2.11933   11.18917   0.189   0.8505  
## hard                             -8.49354   14.04498  -0.605   0.5479  
## bar                              11.38077   11.36742   1.001   0.3212  
## pluribus                          3.30266   14.48198   0.228   0.8205  
## pricepercent                     -4.70208    5.36497  -0.876   0.3847  
## chocolate:fruity                 -0.27138   21.81002  -0.012   0.9901  
## chocolate:caramel                13.07220   14.32210   0.913   0.3654  
## chocolate:peanutyalmondy         35.15443   13.67527   2.571   0.0129 *
## chocolate:nougat                -18.20246   30.55374  -0.596   0.5538  
## chocolate:crispedricewafer             NA         NA      NA       NA  
## chocolate:hard                         NA         NA      NA       NA  
## chocolate:bar                          NA         NA      NA       NA  
## chocolate:pluribus                2.61947   18.23292   0.144   0.8863  
## fruity:caramel                   -8.07135   16.22061  -0.498   0.6208  
## fruity:peanutyalmondy                  NA         NA      NA       NA  
## fruity:nougat                          NA         NA      NA       NA  
## fruity:crispedricewafer                NA         NA      NA       NA  
## fruity:hard                       3.41421   12.54127   0.272   0.7865  
## fruity:bar                             NA         NA      NA       NA  
## fruity:pluribus                   0.06859   15.25538   0.004   0.9964  
## caramel:peanutyalmondy          -21.43316   14.66755  -1.461   0.1497  
## caramel:nougat                    2.25620   15.15494   0.149   0.8822  
## caramel:crispedricewafer         -5.02959   14.57986  -0.345   0.7315  
## caramel:hard                     17.88360   20.02275   0.893   0.3757  
## caramel:bar                      -0.67403   14.02180  -0.048   0.9618  
## caramel:pluribus                       NA         NA      NA       NA  
## peanutyalmondy:nougat            16.51993   16.11517   1.025   0.3099  
## peanutyalmondy:crispedricewafer        NA         NA      NA       NA  
## peanutyalmondy:hard                    NA         NA      NA       NA  
## peanutyalmondy:bar              -31.09163   13.98532  -2.223   0.0304 *
## peanutyalmondy:pluribus         -13.52027   13.74120  -0.984   0.3295  
## nougat:crispedricewafer                NA         NA      NA       NA  
## nougat:hard                            NA         NA      NA       NA  
## nougat:bar                             NA         NA      NA       NA  
## nougat:pluribus                        NA         NA      NA       NA  
## crispedricewafer:hard                  NA         NA      NA       NA  
## crispedricewafer:bar             12.97754   13.61367   0.953   0.3447  
## crispedricewafer:pluribus              NA         NA      NA       NA  
## hard:bar                               NA         NA      NA       NA  
## hard:pluribus                     0.62229    7.80205   0.080   0.9367  
## bar:pluribus                           NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.1 on 54 degrees of freedom
## Multiple R-squared:  0.6912, Adjusted R-squared:  0.5311 
## F-statistic: 4.317 on 28 and 54 DF,  p-value: 2.063e-06

Same as before, but better do a double lasso because of colinearity problem.

rs <- rlasso(winpercent ~ .^2, robust_num)
selected <- which(coef(rs)[-c(1:1)] != 0) # = Select relevant variables = #
formula <- paste(c("winpercent ~", names(selected)), collapse = "+")
mod_lm_ia_lasso_wo_sugar <- lm(formula, data = data_num)
summary(mod_lm_ia_lasso_wo_sugar)
## 
## Call:
## lm(formula = formula, data = data_num)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.6242  -7.7223  -0.3664   8.4296  24.7670 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              42.271      1.686  25.068   <2e-16 ***
## chocolate                12.270      5.339   2.298   0.0242 *  
## chocolate:pricepercent   10.217      7.531   1.357   0.1787    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.44 on 80 degrees of freedom
## Multiple R-squared:  0.4134, Adjusted R-squared:  0.3987 
## F-statistic: 28.19 on 2 and 80 DF,  p-value: 5.419e-10
vif(mod_lm_ia_lasso_wo_sugar)
## there are higher-order terms (interactions) in this model
## consider setting type = 'predictor'; see ?vif
##              chocolate chocolate:pricepercent 
##               4.468427               4.468427
plot(mod_lm_ia_lasso_wo_sugar)

Chocolate and peanutyalmondy is not important any more. But note that R-square is very low.

Prediction

We want to find the candy with the highest winpercent. Given only the data, this is:

print(data[data$winpercent == max(data$winpercent), ])
##               competitorname producer chocolate fruity caramel peanutyalmondy
##                       <char>   <char>     <int>  <int>   <int>          <int>
## 1: ReeseÕs Peanut Butter cup  Hershey         1      0       0              1
##    nougat crispedricewafer  hard   bar pluribus sugarpercent pricepercent
##     <int>            <int> <int> <int>    <int>        <num>        <num>
## 1:      0                0     0     0        0    0.7108434    0.6385542
##    winpercent
##         <num>
## 1:   84.18029

Now we can check if there is a better solution by using the lm-model and inventing observations. We use a grid for that.

inventions <- read.csv(text = paste(colnames(data_num), collapse = ","))
inventions$winpercent <- NULL
for (choc in seq(0, 1)) {
  for (fruit in seq(0, 1)) {
    for (caram in seq(0, 1)) {
      for (peanut in seq(0, 1)) {
        for (noug in seq(0, 1)) {
          for (crisped in seq(0, 1)) {
            for (har in seq(0, 1)) {
              for (ba in seq(0, 1)) {
                for (plur in seq(0, 1)) {
                  for (sugar in seq(0, 1, 0.2)) {
                    for (price in seq(0, 1, 0.2)) {
                      inventions[nrow(inventions) + 1, ] <- c(choc, fruit, caram, peanut, noug, crisped, har, ba, plur, sugar, price)
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

Now we do the prediction but we use a model without sugar in order to prevent unrealistic products as this variable might be wrong. Additionally, the model specification for models without sugar were better.We do not use the interaction model as due to colinearity the prediction might be wrong. In the end we use mod_lm_sq_wo_sugar as this has the best specification tests and adjusted R^2.

inventions_w_win <- inventions
# filter with row value <= 5, i.e. max(data$sum)
inventions_w_win$sum <- (
  inventions_w_win$chocolate +
    inventions_w_win$fruity +
    inventions_w_win$caramel +
    inventions_w_win$peanutyalmondy +
    inventions_w_win$nougat +
    inventions_w_win$crispedricewafer +
    inventions_w_win$hard +
    inventions_w_win$bar +
    inventions_w_win$pluribus
)
inventions_w_win <- inventions_w_win[inventions_w_win$sum <= 5, ]
# predict
win <- predict(mod_lm_sq_wo_sugar, inventions_w_win)
inventions_w_win <- cbind(inventions_w_win, win)
inventions_w_win <- inventions_w_win[order(-win), ]
head(inventions_w_win, 20)
chocolatefruitycaramelpeanutyalmondynougatcrispedricewaferhardbarpluribussugarpercentpricepercentsumwin
1111010000  0.4589.4
1111010000.20.4589.4
1111010000.40.4589.4
1111010000.60.4589.4
1111010000.80.4589.4
1111010001  0.4589.4
1111010000  0.6588.5
1111010000.20.6588.5
1111010000.40.6588.5
1111010000.60.6588.5
1111010000.80.6588.5
1111010001  0.6588.5
1111010000  0.2588.1
1111010000.20.2588.1
1111010000.40.2588.1
1111010000.60.2588.1
1111010000.80.2588.1
1111010001  0.2588.1
1101010100  0.4588  
1101010100.20.4588  
write.xlsx(head(inventions_w_win, 20), here::here("output", "2021", "predictions.xlsx"))

Last, save the regression coefficients for use in the PPT

hux <- huxtablereg(
  list(
    mod_lm,
    mod_lm_sq,
    mod_lm_ia,
    mod_lm_ia_lasso,
    mod_glm,
    mod_wo_sugar,
    mod_lm_sq_wo_sugar
  ),
  stars = numeric(0),
  single.row = TRUE
)
quick_pptx(hux, file = here::here("output", "2021", "regs.pptx"))

Special regression

We do some special isolated regression to have a better knowledge of certain combinations, e.g. fruit gums.

Fruit gums

mod_fruit_gum <- lm(winpercent ~ pluribus * fruity, data_num)
summary(mod_fruit_gum)
## 
## Call:
## lm(formula = winpercent ~ pluribus * fruity, data = data_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.634  -9.972  -1.069   9.538  24.621 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       60.865      2.487  24.471  < 2e-16 ***
## pluribus         -12.050      3.933  -3.064  0.00299 ** 
## fruity           -19.614      4.484  -4.374 3.68e-05 ***
## pluribus:fruity   16.244      5.984   2.715  0.00815 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.92 on 79 degrees of freedom
## Multiple R-squared:  0.2603, Adjusted R-squared:  0.2322 
## F-statistic: 9.265 on 3 and 79 DF,  p-value: 2.545e-05

Both coefficients are negative, but together it seems okay. Consider that in the interaction model the fruity:pluribus coefficient was close to zero.